Loading Data

counts <- read.delim(
  file = here("data/raw/GBM_raw_gene_counts.csv"), 
  sep = " ", 
  row.names = 1, 
  check.names = F,
  ) 
head(counts)[c(1:15, ncol(counts)-5:ncol(counts))]

We loaded 3589 cells and 23368 genes.

metadata <- read.delim(
  file = here("data/raw/GBM_metadata.csv"), 
  sep = " ", 
  row.names = 1, 
  stringsAsFactors = TRUE,
)
head(metadata)
metadata <- metadata |> 
  select(!c(Sample.type, housekeeping_cluster, ends_with("color"))) |> 
  rename(Patient = Sample.name, Paper_cluster = Cluster_2d) |> 
  unite("Batch", Patient, Location, remove = FALSE) |> 
  relocate(Batch, Patient, Location, Selection) |> 
  mutate(Batch = as_factor(Batch))
head(metadata, n = 3)
table(metadata[c("Patient", "Location")])
##        Location
## Patient Distant Periphery Tumor
##   BT_S1       0        56   433
##   BT_S2       0       446   723
##   BT_S4       0       562   980
##   BT_S6      57       125   207
patient_data <- read_excel(
  here("data/raw/patient_data.xlsx"),
  range = "A2:V6",
  .name_repair = "unique_quiet"
  ) |> 
  rename(Patient = 1) |>
  rename_with(~ gsub(" ", "_", .x, fixed = TRUE)) |>
  rename_with(~ paste("Subtype", .x, sep = "_"), Classical:Proneural) |>
  rename_with(~ paste(.x, "expressing_cell_fraction", sep = "_"), CD274:B2M)
patient_data

Where WT = Wildtype, M = Methylated, NM = Non methylated, NT = Not tested.

options(Seurat.object.assay.calcn = TRUE)
obj <- CreateSeuratObject(
  counts = counts,
  meta.data = metadata,
  min.cells = 3,
  min.features = 100,
  )

We’re left with 3584 cells and 20721 genes.

QC

VlnPlot(
  obj,
  features = c("nFeature_RNA", "nCount_RNA", "ERCC_reads"),
  layer = "counts",
) &
  labs(x = NULL) &
  scale_x_discrete(labels = NULL, breaks = NULL)

VlnPlot(
  obj,
  features = "ERCC_to_non_ERCC",
  layer = "counts",
  y.max = 3,
) +
  labs(x = NULL) +
  scale_x_discrete(labels = NULL, breaks = NULL) +
  guides(fill="none")

n_feature_cutoff = 200
n_count_cutoff = 50000

FeatureScatter(
  obj,
  feature1 = "nCount_RNA", 
  feature2 = "nFeature_RNA",
  cols = "black"
) +
  NoLegend() +
  geom_hline(
    aes(yintercept = n_feature_cutoff), 
    color = "red", linetype = "dashed", 
  ) +
  geom_vline(
    aes(xintercept = n_count_cutoff), 
    color = "red", linetype = "dashed",
  )

obj <- subset(obj, subset = nFeature_RNA > n_feature_cutoff & nCount_RNA > n_count_cutoff)

104 cells filtered out during QC.

Cell Type Annotation

plot_cell_cls <- function(reduction) {
  (DimPlot(
  obj, 
  reduction = reduction,
  group.by = "predicted.subclass",
  cols = DiscretePalette(n = length(unique(obj$predicted.subclass)), palette = "polychrome"),
  pt.size = 0.5,
) | 
  FeaturePlot(
    obj, 
    reduction = reduction,
    features = "predicted.subclass.score",
    pt.size = 0.5,
  ))  / 
  (DimPlot(
    obj, 
    reduction = reduction,
    group.by = "Batch",
    cols = DiscretePalette(n = nlevels(obj$Batch), palette = "polychrome"),
    pt.size = 0.5,
  ) |
  DimPlot(
      obj, 
      reduction = reduction,
      group.by = "Selection",
      cols = DiscretePalette(n = nlevels(obj$Selection), palette = "polychrome"),
      pt.size = 0.5,
  ))
}
plot_cell_cls("ref.umap")

Transformation / DimRed (No Integration Version)

obj <- SCTransform(object = obj, verbose = FALSE) |> 
  RunPCA(verbose = FALSE)
ElbowPlot(object = obj, ndims = 50)

obj <- RunUMAP(object = obj, dims = 1:30, verbose = FALSE)

DimPlot(
  object = obj, 
  reduction = "umap",
  group.by = "Batch",
  cols = DiscretePalette(n = nlevels(obj$Batch), palette = "polychrome"),
  pt.size = 0.1,
) + 
  DimPlot(
    object = obj, 
    reduction = "umap",
    group.by = "Selection",
    pt.size = 0.1,
  )

Transformation / DimRed (With Integration This Time)

obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Batch)
obj <- SCTransform(object = obj, verbose = FALSE)
obj <- RunPCA(object = obj, verbose = FALSE)
ElbowPlot(object = obj, ndims = 50)

obj <- IntegrateLayers(
  object = obj,
  method = HarmonyIntegration,
  normalization.method = "SCT",
  orig.reduction = "pca", new.reduction = "harmony",
  assay = "SCT",
  verbose = FALSE
)
obj <- RunUMAP(object = obj, reduction = "harmony", dims = 1:30, reduction.name = "umap_harmony", verbose = F)
plot_cell_cls("umap_harmony")

Saving Results

saveRDS(
   object = obj,
   file = here("data/processed/01_initial_processing.Rds")
)
Session Info
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.4
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Kiev
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
##  [1] humancortexref.SeuratData_1.0.0 here_1.0.1                      Azimuth_0.5.0                  
##  [4] shinyBS_0.61.1                  Seurat_5.2.1                    SeuratObject_5.0.2             
##  [7] sp_2.2-0                        readxl_1.4.5                    lubridate_1.9.4                
## [10] forcats_1.0.0                   stringr_1.5.1                   dplyr_1.1.4                    
## [13] purrr_1.0.4                     readr_2.1.5                     tidyr_1.3.1                    
## [16] tibble_3.2.1                    ggplot2_3.5.1                   tidyverse_2.0.0                
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.5                          ProtGenerics_1.38.0               matrixStats_1.5.0                
##   [4] spatstat.sparse_3.1-0             bitops_1.0-9                      DirichletMultinomial_1.48.0      
##   [7] TFBSTools_1.44.0                  httr_1.4.7                        RColorBrewer_1.1-3               
##  [10] tools_4.4.2                       sctransform_0.4.1                 R6_2.6.1                         
##  [13] DT_0.33                           lazyeval_0.2.2                    uwot_0.2.3                       
##  [16] rhdf5filters_1.18.1               withr_3.0.2                       gridExtra_2.3                    
##  [19] rematch_2.0.0                     progressr_0.15.1                  cli_3.6.4                        
##  [22] Biobase_2.66.0                    spatstat.explore_3.4-2            fastDummies_1.7.5                
##  [25] EnsDb.Hsapiens.v86_2.99.0         shinyjs_2.1.0                     labeling_0.4.3                   
##  [28] sass_0.4.9                        spatstat.data_3.1-6               ggridges_0.5.6                   
##  [31] pbapply_1.7-2                     Rsamtools_2.22.0                  R.utils_2.13.0                   
##  [34] harmony_1.2.3                     parallelly_1.43.0                 BSgenome_1.74.0                  
##  [37] rstudioapi_0.17.1                 RSQLite_2.3.9                     generics_0.1.3                   
##  [40] BiocIO_1.16.0                     gtools_3.9.5                      ica_1.0-3                        
##  [43] spatstat.random_3.3-3             googlesheets4_1.1.1               GO.db_3.20.0                     
##  [46] Matrix_1.7-3                      ggbeeswarm_0.7.2                  S4Vectors_0.44.0                 
##  [49] abind_1.4-8                       R.methodsS3_1.8.2                 lifecycle_1.0.4                  
##  [52] yaml_2.3.10                       SummarizedExperiment_1.36.0       glmGamPoi_1.18.0                 
##  [55] rhdf5_2.50.2                      SparseArray_1.6.2                 Rtsne_0.17                       
##  [58] grid_4.4.2                        blob_1.2.4                        promises_1.3.2                   
##  [61] shinydashboard_0.7.2              crayon_1.5.3                      pwalign_1.2.0                    
##  [64] miniUI_0.1.1.1                    lattice_0.22-6                    cowplot_1.1.3                    
##  [67] GenomicFeatures_1.58.0            annotate_1.84.0                   KEGGREST_1.46.0                  
##  [70] pillar_1.10.1                     knitr_1.50                        GenomicRanges_1.58.0             
##  [73] rjson_0.2.23                      future.apply_1.11.3               codetools_0.2-20                 
##  [76] fastmatch_1.1-6                   glue_1.8.0                        spatstat.univar_3.1-2            
##  [79] data.table_1.17.0                 vctrs_0.6.5                       png_0.1-8                        
##  [82] spam_2.11-1                       cellranger_1.1.0                  gtable_0.3.6                     
##  [85] poweRlaw_1.0.0                    cachem_1.1.0                      xfun_0.51                        
##  [88] Signac_1.14.0                     S4Arrays_1.6.0                    mime_0.13                        
##  [91] survival_3.8-3                    gargle_1.5.2                      RcppRoll_0.3.1                   
##  [94] fitdistrplus_1.2-2                ROCR_1.0-11                       nlme_3.1-167                     
##  [97] bit64_4.6.0-1                     RcppAnnoy_0.0.22                  rprojroot_2.0.4                  
## [100] GenomeInfoDb_1.42.3               bslib_0.9.0                       irlba_2.3.5.1                    
## [103] vipor_0.4.7                       KernSmooth_2.23-26                SeuratDisk_0.0.0.9021            
## [106] colorspace_2.1-1                  seqLogo_1.72.0                    BiocGenerics_0.52.0              
## [109] DBI_1.2.3                         ggrastr_1.0.2                     tidyselect_1.2.1                 
## [112] bit_4.6.0                         compiler_4.4.2                    curl_6.2.2                       
## [115] hdf5r_1.3.12                      DelayedArray_0.32.0               plotly_4.10.4                    
## [118] rtracklayer_1.66.0                scales_1.3.0                      caTools_1.18.3                   
## [121] lmtest_0.9-40                     rappdirs_0.3.3                    digest_0.6.37                    
## [124] goftest_1.2-3                     presto_1.0.0                      spatstat.utils_3.1-3             
## [127] rmarkdown_2.29                    RhpcBLASctl_0.23-42               XVector_0.46.0                   
## [130] htmltools_0.5.8.1                 pkgconfig_2.0.3                   sparseMatrixStats_1.18.0         
## [133] MatrixGenerics_1.18.1             fastmap_1.2.0                     ensembldb_2.30.0                 
## [136] rlang_1.1.5                       htmlwidgets_1.6.4                 UCSC.utils_1.2.0                 
## [139] DelayedMatrixStats_1.28.1         shiny_1.10.0                      farver_2.1.2                     
## [142] jquerylib_0.1.4                   zoo_1.8-13                        jsonlite_2.0.0                   
## [145] BiocParallel_1.40.2               R.oo_1.27.0                       RCurl_1.98-1.17                  
## [148] magrittr_2.0.3                    GenomeInfoDbData_1.2.13           dotCall64_1.2                    
## [151] patchwork_1.3.0                   Rhdf5lib_1.28.0                   munsell_0.5.1                    
## [154] Rcpp_1.0.14                       reticulate_1.42.0                 stringi_1.8.7                    
## [157] zlibbioc_1.52.0                   MASS_7.3-65                       plyr_1.8.9                       
## [160] parallel_4.4.2                    listenv_0.9.1                     ggrepel_0.9.6                    
## [163] deldir_2.0-4                      CNEr_1.42.0                       Biostrings_2.74.1                
## [166] splines_4.4.2                     tensor_1.5                        hms_1.1.3                        
## [169] BSgenome.Hsapiens.UCSC.hg38_1.4.5 igraph_2.1.4                      spatstat.geom_3.3-6              
## [172] RcppHNSW_0.6.0                    reshape2_1.4.4                    stats4_4.4.2                     
## [175] TFMPvalue_0.0.9                   XML_3.99-0.18                     evaluate_1.0.3                   
## [178] renv_1.1.2                        BiocManager_1.30.25               JASPAR2020_0.99.10               
## [181] tzdb_0.5.0                        httpuv_1.6.15                     RANN_2.6.2                       
## [184] polyclip_1.10-7                   future_1.34.0                     SeuratData_0.2.2.9002            
## [187] scattermore_1.2                   xtable_1.8-4                      restfulr_0.0.15                  
## [190] AnnotationFilter_1.30.0           RSpectra_0.16-2                   later_1.4.1                      
## [193] googledrive_2.1.1                 viridisLite_0.4.2                 beeswarm_0.4.0                   
## [196] memoise_2.0.1                     AnnotationDbi_1.68.0              GenomicAlignments_1.42.0         
## [199] IRanges_2.40.1                    cluster_2.1.8                     timechange_0.3.0                 
## [202] globals_0.16.3